# Importing required libraries
import pyedflib
import networkx as nx
import connectivipy as cp
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import community
from collections import defaultdict, Counter
from scipy import signal
import warnings
warnings.filterwarnings('ignore')
# eyes open ------------------------------------------------------------
filename = "../input/S064R01.edf"
f = pyedflib.EdfReader(filename)
n = f.signals_in_file
labels = f.getSignalLabels()
sigbufs = np.zeros((n, f.getNSamples()[0]))
for i in np.arange(n):
sigbufs[i, :] = f.readSignal(i)
signal_open = sigbufs
freq_open = f.getSampleFrequency(0)
f._close()
# eyes closed ------------------------------------------------------------
filename = "../input/S064R02.edf"
f = pyedflib.EdfReader(filename)
sigbufs = np.zeros((n, f.getNSamples()[0]))
for i in np.arange(n):
sigbufs[i, :] = f.readSignal(i)
signal_closed = sigbufs
freq_closed = f.getSampleFrequency(0)
f._close()
# ------------------------------------------------------------------------
print("number of signals: \n\t", n)
print("name of all signals: \n\t", labels)
print("time: \n\t", f.getNSamples()[0])
# dictinary to map idx and name of signals. (This will be used coming question Q1.4)
label_dict = {idx: signal for idx, signal in enumerate(labels)}
del f
signal_open.shape, signal_closed.shape
def ChannelSelection(inputSignal, inputfreq, inputLobes, nps=160):
""" The power spectral density is calculated using
the reference from scipy documention for signal.welch """
bestchannel, optimumAlphafreq, max_psd = 0, 0, 0
for i in inputLobes:
f, Pxx_den = signal.welch(x=inputSignal[i, :], fs=inputfreq, nperseg=nps)
# frequency range for alpha waves when subject eyes are closed
alphaWaveRange = np.where((f >= 8) & (f <= 13))
plt.title('Alpha waves with their PSD in Occipetal Lobe')
plt.plot(alphaWaveRange[0],Pxx_den[alphaWaveRange])
# The index of the alpha wave with more power spectral density
bestAlpha_index = np.argmax(Pxx_den[alphaWaveRange])
if max(max_psd, Pxx_den[bestAlpha_index]) > max_psd:
max_psd = Pxx_den[bestAlpha_index]
bestchannel = i
#The index of best frequency for alpha waves in the 8 >= frequency range <= 13
optimumAlphafreq = f[bestAlpha_index]+8
plt.show()
return bestchannel, optimumAlphafreq
# Get only occipital lobes
O_Lobes = list(idx for idx, name in enumerate(labels) if 'o' in name.lower())
print("Occipital indexes:", " ".join(map(str, O_Lobes)))
bestchannel, maxAlphafreq = ChannelSelection(inputSignal=signal_closed, inputfreq=160, inputLobes=O_Lobes)
print('The channel that is interesting is {} with the frequency {} hertz'.format(label_dict[bestchannel],int(maxAlphafreq)))
def create_connectivity_matrix(signals, labels, freq, estimator = "PDC", akaike_plot = False, significance = False):
# 1. Build MVAR model --------------------------------------------------------------------------------------------
mv = cp.Mvar
# 2. Find an optimal order p -------------------------------------------------------------------------------------
# Akaike criterion of MVAR order estimation.
best_order, crit = mv.order_akaike(data=signals, p_max=20, method='yw') # vm, ns also work
# crit: order criterion values for each value of order p starting from 1
print("best order by AIC:", best_order)
if akaike_plot:
plt.plot(1+np.arange(len(crit)), crit, 'g')
plt.show()
# 3. Fit the MVAR model using the obtained optimal order p -----------------------------------------------
# Transforming matrix to connectivipy
data = cp.Data(data = signals, fs = freq, chan_names = labels )# fs: sampling frequency
# Fit the model using obtained best order
data.fit_mvar(p = best_order, method = 'yw') # p: estimation order
# 4. Obtain coefficients and residual errors ---------------------------------------------------------------------
# acoef: fitted parameters, vcoef: residual matrix
# acoef shape (p, N, N), vcoef shape (N, N)
acoef, vcoef = data.mvar_coefficients
# 5. Apply these parameters to Estimator -----------------------------------------------------------------------
if estimator == "PDC":
pdc = cp.conn.PDC()
val = pdc.calculate(acoef, vcoef, freq) # resolution: number of spectrum data points
if estimator == "DTF":
dtf = cp.conn.DTF()
val = dtf.calculate(acoef, vcoef, freq)
# 6. Pick the matrix with the best frequency ---------------------------------------------------------------------
best_frequency = maxAlphafreq
matrix = val[int(best_frequency), :, :].reshape((len(labels), len(labels)))
np.fill_diagonal(matrix, 0)
# Coompute Significance --------------------------------------------------------------------------------------------
# This is used in Question 1.4
if significance:
print("compute significance -------------------------------------------------------")
if estimator == "PDC":
values = data.conn('pdc')
else:
values = data.conn('dtf')
significance_matrix = data.significance(Nrep=100, alpha=0.05)
# Filter out values that are not significantly different from 0
matrix[significance_matrix > 0.05] = 0
return matrix
matrix_open_dtf = create_connectivity_matrix(signal_open, labels, freq_open, estimator="DTF")
matrix_closed_dtf = create_connectivity_matrix(signal_closed, labels, freq_open, estimator="DTF")
# eyes open --------------------------------------------------------------------
# all values in matrix
thresholds = [item for sublist in matrix_open_dtf for item in sublist]
for threshold in thresholds:
binary_open = (matrix_open_dtf > threshold).astype(int)
G = nx.from_numpy_matrix(binary_open, create_using=nx.MultiDiGraph())
density = nx.density(G)*100
if density*0.99 < 20 and 20 < density*1.01: # Because it cannot be exact target density
break
print("eyes open \t threshold:", round(threshold, 3), "\t density: ", round(density,0), "%")
# eyes closed --------------------------------------------------------------------
# all values in matrix
thresholds = [item for sublist in matrix_closed_dtf for item in sublist]
for threshold in thresholds:
binary_closed = (matrix_closed_dtf > threshold).astype(int)
G = nx.from_numpy_matrix(binary_closed, create_using=nx.MultiDiGraph())
density = nx.density(G)*100
if density*0.99 < 20 and 20 < density*1.01: # Because it cannot be exact target density
break
print("eyes closed \t threshold:", round(threshold, 3), "\t density: ", round(density,0), "%")
"""
this function is to print and plot of connectivity matrix for both PDC and DTF
"""
def plot_binary_matrix(matrix_open, matrix_closed, density, estimator, file_name = None, save_file = False):
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(8, 4))
mat_open = axes[0].imshow(matrix_open, cmap="viridis")
axes[0].set_title(str(estimator) + " EO " + str(density) +"% density")
mat_closed = axes[1].imshow(matrix_closed, cmap="viridis")
axes[1].set_title(str(estimator) + " EC "+ str(density) + "% density")
if save_file:
plt.savefig(file_name + ".png")
plt.show()
plot_binary_matrix(binary_open, binary_closed, 20, "DTF")
matrix_open_pdc = create_connectivity_matrix(signal_open, labels, freq_open, estimator="PDC")
matrix_closed_pdc = create_connectivity_matrix(signal_closed, labels, freq_open, estimator="PDC")
density_list = [50, 30, 20, 10, 5, 1]
dtf_binary_matrix = {"EO":{}, "EC":{}} # To keep the matrix for later questions
for idx, target_density in enumerate(density_list):
thresholds = [item for sublist in matrix_open_dtf for item in sublist]
for threshold in thresholds:
# create binary adjacency matrix
binary_open = (matrix_open_dtf > threshold).astype(int)
G = nx.from_numpy_matrix(binary_open, create_using=nx.MultiDiGraph())
# compute density
density = round(nx.density(G)*100, 0)
if density == target_density:
matrix_open_dtf[matrix_open_dtf < threshold] = 0
break
dtf_binary_matrix["EO"][target_density] = binary_open
print("eyes open \t threshold:", round(threshold, 3), "\t density: ", density, "%")
# eyes closed --------------------------------------------------------------------
thresholds = [item for sublist in matrix_closed_dtf for item in sublist]
for threshold in thresholds:
binary_closed = (matrix_closed_dtf > threshold).astype(int)
G = nx.from_numpy_matrix(binary_closed, create_using=nx.MultiDiGraph())
density = round(nx.density(G)*100, 0)
if density == target_density:
matrix_closed_dtf[matrix_closed_dtf < threshold] = 0
break
dtf_binary_matrix["EC"][target_density] = binary_closed
print("eyes closed \t threshold:", round(threshold, 3), "\t density: ", density, "%")
name = "../output/1.3_DTF_" + str(target_density)
plot_binary_matrix(binary_open, binary_closed, target_density, "DTF", file_name=name, save_file=True)
pdc_binary_matrix = {"EO":{}, "EC":{}} # To keep the matrix for later questions
for idx, target_density in enumerate(density_list):
thresholds = [item for sublist in matrix_open_pdc for item in sublist]
for threshold in thresholds:
binary_open = (matrix_open_pdc > threshold).astype(int)
G = nx.from_numpy_matrix(binary_open, create_using=nx.MultiDiGraph())
density = round(nx.density(G)*100, 0)
if density == target_density:
matrix_open_pdc[matrix_open_pdc < threshold] = 0
break
pdc_binary_matrix["EO"][target_density] = binary_open
print("eyes open \t threshold:", round(threshold, 3), "\t density: ", density, "%")
# eyes closed --------------------------------------------------------------------
thresholds = [item for sublist in matrix_closed_pdc for item in sublist]
for threshold in thresholds:
binary_closed = (matrix_closed_pdc > threshold).astype(int)
G = nx.from_numpy_matrix(binary_closed, create_using=nx.MultiDiGraph())
density = round(nx.density(G)*100, 0)
if density == target_density:
matrix_closed_pdc[matrix_closed_pdc < threshold] = 0
break
pdc_binary_matrix["EC"][target_density] = binary_closed
print("eyes closed \t threshold:", round(threshold, 3), "\t density: ", density, "%")
name = "../output/1.3_PDC_" + str(target_density)
plot_binary_matrix(binary_open, binary_closed, target_density, "PDC", file_name=name, save_file=True)
labels_19 = ["Fp1.", "Fp2.", "F7..", "F3..", "Fz..", "F4..", "F8..", "T7..", "C3..", "Cz..",
"C4..", "T8..", "P7..", "P3..", "Pz..", "P4..", "P8..", "O1..", "O2.."]
# Create a dictinary including only 19 signals (key: index, value: name of signals)
label_19_dict = {key: signal for key, signal in label_dict.items() if signal in labels_19}
signals_idx_19 = list(label_19_dict.keys()) # Return the index of rows that we extract original signal logs
signal_open_19 =signal_open[signals_idx_19, :]
signal_closed_19 =signal_closed[signals_idx_19, :]
labels_19 = label_19_dict.values()
print(signal_open_19.shape, signal_closed_19.shape)
# Another dictionary (key: [0, 18] by order, value: name of signals)
# This is used in order to find the signals which will be filtered out by signanificnace values
label_19_dict = {idx: name for idx, name in enumerate(label_19_dict.values())}
matrix_open_pdc_19 = create_connectivity_matrix(signal_open_19, labels_19, freq_open, estimator="PDC", significance=True)
matrix_closed_pdc_19 = create_connectivity_matrix(signal_closed_19, labels_19, freq_closed, estimator="PDC", significance=True)
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(8, 4))
mat_open = axes[0].imshow(matrix_open_pdc_19, cmap="viridis")
axes[0].set_title("19 signals EO")
mat_closed = axes[1].imshow(matrix_closed_pdc_19, cmap="viridis")
axes[1].set_title("19 signals EC")
plt.show()
binary_open_pdc_19 = (matrix_open_pdc_19 > 0).astype(int)
binary_closed_pdc_19 = (matrix_closed_pdc_19 > 0).astype(int)
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(8, 4))
mat_open = axes[0].imshow(binary_open_pdc_19, cmap="viridis")
axes[0].set_title("19 signals EO")
mat_closed = axes[1].imshow(binary_closed_pdc_19, cmap="viridis")
axes[1].set_title("19 signals EC")
plt.show()
locations= pd.read_csv("../input/channel_locations.txt", sep=" ")[["label", "x", "y"]]
x=locations.x.values
y=locations.y.values
positions = {idx: [x[idx], y[idx]] for idx in range(len(locations))}
# regarding 19 signals ----------------------------------------------------------
locations_19 = locations.loc[locations['label'].isin(labels_19)]
x_19 = locations_19.x.values
y_19 = locations_19.y.values
positions_19 = {idx: [x_19[idx], y_19[idx]] for idx in range(len(locations_19))}
matrix_open_pdc_19 = create_connectivity_matrix(signal_open_19, labels_19, freq_open, estimator="PDC")
matrix_closed_pdc_19 = create_connectivity_matrix(signal_closed_19, labels_19, freq_closed, estimator="PDC")
matrix_open_dtf_19 = create_connectivity_matrix(signal_open_19, labels_19, freq_open, estimator="DTF")
matrix_closed_dtf_19 = create_connectivity_matrix(signal_closed_19, labels_19, freq_closed, estimator="DTF")
list_of_matrix = [matrix_open_pdc_19, matrix_closed_pdc_19, matrix_open_dtf_19, matrix_closed_dtf_19]
print(matrix_open_pdc_19.shape, matrix_closed_pdc_19.shape, matrix_open_dtf_19.shape, matrix_closed_dtf_19.shape )
for idx, matrix in enumerate(list_of_matrix):
thresholds = [item for sublist in matrix for item in sublist]
for threshold in thresholds:
binary_matrix = (matrix > threshold).astype(int)
G = nx.from_numpy_matrix(binary_matrix, create_using=nx.MultiDiGraph())
density = round(nx.density(G)*100, 0)
if density == 5:
rows = np.nonzero(binary_matrix == 0)[0] # index of row which is not connected
cols = np.nonzero(binary_matrix == 0)[1] # index of column which is not connected
# delete values in original matrix in order to make density 5%
for index in range(len(rows)):
matrix[rows[index]][cols[index]] = 0
break
list_of_matrix[idx] = matrix
estimators = ["PDC", "DTF"]
eyes = ["EO", "EC"]
for i in range(len(estimators)):
fig, ax = plt.subplots(1, 2, figsize=((12, 5)))
for idx, matrix in enumerate(list_of_matrix[i*2:(i+1)*2]):
G = nx.from_numpy_matrix(matrix, create_using=nx.MultiDiGraph())
for n, coordinate in positions_19.items():
G.node[n]['pos'] = coordinate
nx.draw_networkx_nodes(G, positions_19, node_color='r', node_shape="h", alpha=0.8, node_size=450, ax = ax[idx])
nx.draw_networkx_edges(G, positions_19, width=1.0, alpha=0.5, ax = ax[idx])
nx.draw_networkx_labels(G, positions_19, label_19_dict, font_size=10, ax = ax[idx])
name = estimators[i] + " " + eyes[idx]
ax[idx].set_title(name)
ax[idx].grid()
plt.savefig("../output/1.5_" + estimators[i] + ".png")
plt.show()
df = pd.DataFrame(columns=["Estimator", "Eyes", "Density", "Clustering_Coefficient", "Path_Length",
"High_Degree", "High_In-Degree", "High_Out-Degree"])
estimator = ["PDC", "DTF"]
for i, matrix in enumerate([pdc_binary_matrix, dtf_binary_matrix]):
for eyes in ["EO", "EC"]:
for density in density_list:
info_indices = [estimator[i], eyes, density]
curr_matrix = matrix[eyes][density]
# Global graph indices ----------------------------------------------------------------------------------------------------
# Global Clustering Coefficient
G = nx.from_numpy_matrix(curr_matrix)
gcc = round(nx.average_clustering(G), 3)
info_indices.append(gcc)
# Average Path Length
G = nx.from_numpy_matrix(curr_matrix, create_using=nx.MultiDiGraph())
G.remove_nodes_from(list(nx.isolates(G)))
try:
apl = round(nx.average_shortest_path_length(G),3)
except:
apl = "not fully connected"
info_indices.append(apl)
# Local graph indices -----------------------------------------------------------------------------------------------------
degrees = G.degree()
degrees = sorted([(k, degrees[k]) for k in degrees], key=lambda x:x[1], reverse=True)
top10 = [label_dict[signal[0]] for signal in degrees[:10]]
info_indices.append(top10)
degrees = G.in_degree()
degrees=sorted([(k, degrees[k]) for k in degrees], key=lambda x:x[1], reverse=True)
top10 = [label_dict[signal[0]] for signal in degrees[:10] ]
info_indices.append(top10)
degrees = G.out_degree()
degrees=sorted([(k, degrees[k]) for k in degrees], key=lambda x:x[1], reverse=True)
top10 = [label_dict[signal[0]] for signal in degrees[:10] ]
info_indices.append(top10)
df.loc[len(df)] = info_indices
df_global = df[["Estimator", "Eyes", "Density", "Clustering_Coefficient", "Path_Length"]]
df_local = df[["Estimator", "Eyes", "Density", "High_Degree", "High_In-Degree", "High_Out-Degree"]]
df_global.to_csv("../output/2.3_df_global.csv", sep = "\t", index=False, header=True)
df_global
df_global_pdc_eo = df_global[0:6]
df_global_pdc_ec = df_global[6:12]
df_global_dtf_eo = df_global[12:18]
df_global_dtf_ec = df_global[18:24]
plt.figure(figsize = (8, 5))
ax = plt.gca()
df_global_dtf_ec.plot(kind='line', x='Density', y='Clustering_Coefficient', ax=ax, label = "DTF EC", color = "blue")
df_global_dtf_eo.plot(kind='line', x='Density', y='Clustering_Coefficient', ax=ax, label = "DTF EO", style = "--", color = "blue")
df_global_pdc_ec.plot(kind='line', x='Density', y='Clustering_Coefficient', ax=ax, label = "PDC EC", color = "red")
df_global_pdc_eo.plot(kind='line', x='Density', y='Clustering_Coefficient', ax=ax, label = "PDC EO", style = "--", color = "red")
plt.ylabel("Clustering Coefficient", fontsize=12)
plt.xlabel("Density", fontsize=12)
plt.title("Clustering Coefficient Comparison", fontsize=15)
plt.xticks([1, 5, 10, 20, 30, 50])
plt.xlim((0, 51))
plt.grid()
plt.savefig("../output/2.3_CC.png")
plt.show()
plt.figure(figsize = (8, 5))
ax = plt.gca()
df_global_dtf_ec[:5].plot(kind='line', x='Density', y='Path_Length', ax=ax, label = "DTF EC", color = "blue")
df_global_dtf_eo[:5].plot(kind='line', x='Density', y='Path_Length', ax=ax, label = "DTF EO", style = "--", color = "blue")
df_global_pdc_ec[:5].plot(kind='line', x='Density', y='Path_Length', ax=ax, label = "PDC EC", color = "red")
df_global_pdc_eo[:5].plot(kind='line', x='Density', y='Path_Length', ax=ax, label = "PDC EO", style = "--", color = "red")
plt.ylabel("Path Length", fontsize=12)
plt.xlabel("Density", fontsize=12)
plt.title("Path Length Comparison", fontsize=15)
plt.xticks([5, 10, 20, 30, 50])
plt.xlim((4, 51))
plt.grid()
plt.savefig("../output/2.3_PL.png")
plt.show()
df_local
df_local[["Estimator", "Eyes", "Density", "High_Degree"]].to_csv("../output/2.5_local_degree.txt", sep = "\t", index=False, header=True)
df_local[["Estimator", "Eyes", "Density", "High_In-Degree"]].to_csv("../output/2.5_local_indegree.txt", sep = "\t", index=False, header=True)
df_local[["Estimator", "Eyes", "Density", "High_Out-Degree"]].to_csv("../output/2.5_local_outdegree.txt", sep = "\t", index=False, header=True)
estimator = ["PDC", "DTF"]
colors = ["red", "orange"]
for i, matrix in enumerate([pdc_binary_matrix, dtf_binary_matrix]):
for eyes in ["EO", "EC"]:
for density in [5, 20, 50]:
curr_matrix = matrix[eyes][density]
G = nx.from_numpy_matrix(curr_matrix, create_using=nx.MultiDiGraph())
fig, ax = plt.subplots(1, 3, figsize=((18, 5)))
# Degree -----------------------------------------------------------------------------------------------------
degrees = G.degree()
degrees = sorted([(k, degrees[k]) for k in degrees], key=lambda x:x[1], reverse=True)
top10_nodes = [label[0] for label in degrees[:10]]
# Plot the network
color_map = np.zeros(len(degrees))
for node in top10_nodes:
color_map[node] = 1
color_map = [colors[int(i)] for i in color_map]
nx.draw_networkx_nodes(G, positions, node_color = color_map, node_shape="h", node_size=450, alpha=0.8, ax=ax[0])
nx.draw_networkx_edges(G, positions, arrowstyle="-|>", width=0.6, alpha=0.2, ax=ax[0])
nx.draw_networkx_labels(G, positions, label_dict, font_size=10, alpha=0.1, ax=ax[0])
name = str(estimator[i]) + " Degree " + str(eyes) + " " + str(density) + "% density"
ax[0].set_title(name)
ax[0].grid()
plt.xlim((-0.6, 0.6))
# In-Degree -----------------------------------------------------------------------------------------------------
degrees = G.in_degree()
degrees=sorted([(k, degrees[k]) for k in degrees], key=lambda x:x[1], reverse=True)
top10_nodes = [label[0] for label in degrees[:10]]
# Plot the network
color_map = np.zeros(len(degrees))
for node in top10_nodes:
color_map[node] = 1
color_map = [colors[int(i)] for i in color_map]
nx.draw_networkx_nodes(G, positions, node_color = color_map, node_shape="h", node_size=450, alpha=0.8, ax=ax[1])
nx.draw_networkx_edges(G, positions, arrowstyle="-|>", width=0.6, alpha=0.2, ax=ax[1])
nx.draw_networkx_labels(G, positions, label_dict, font_size=10, alpha=0.1, ax=ax[1])
name = str(estimator[i]) + " In-Degree " + str(eyes) + " " + str(density) + "% density"
ax[1].set_title(name)
ax[1].grid()
plt.xlim((-0.6, 0.6))
# Out-Degree -----------------------------------------------------------------------------------------------------
degrees = G.out_degree()
degrees=sorted([(k, degrees[k]) for k in degrees], key=lambda x:x[1], reverse=True)
top10_nodes = [label[0] for label in degrees[:10]]
# Plot the network
color_map = np.zeros(len(degrees))
for node in top10_nodes:
color_map[node] = 1
color_map = [colors[int(i)] for i in color_map]
nx.draw_networkx_nodes(G, positions, node_color = color_map, node_shape="h", node_size=450, alpha=0.8, ax=ax[2])
nx.draw_networkx_edges(G, positions, arrowstyle="-|>", width=0.6, alpha=0.2, ax=ax[2])
nx.draw_networkx_labels(G, positions, label_dict, font_size=10, alpha=0.1, ax=ax[2])
name = str(estimator[i]) + " Out-Degree " + str(eyes) + " " + str(density) + "% density"
ax[2].set_title(name)
ax[2].grid()
plt.xlim((-0.6, 0.6))
name = str(estimator[i]) + " " + str(eyes) + " " + str(density)
plt.savefig("../output/2.5_" + name + ".png")
plt.show()
a recurring subnetwork conjectured to have some significance, that appears with a higher frequency than it would be expected in “similar” random networks¶
Patterns of inter-connections occurring in complex networks at numbers that are significantly higher than those in randomized networks¶
significantly under-represented subnetworks and may also be meaningful¶
it occurs less often than expected in the set¶
"""
a function to text file of bridges for which nodes are connected
first, we save it as dataframe, and convert to txt.file
"""
def create_df_bridges(matrix):
source, destination, edges = [], [], []
for row in range(len(matrix)):
for col in range(len(matrix)):
if matrix[row][col] == 1:
source.append(row)
destination.append(col)
edges.append(matrix[row][col])
# Create a dataframe of edges between nodes
df = pd.DataFrame()
df["source"] = source
df["destination"] = destination
df["connected"] = edges
return df
# Eyes Open ---------------------------------------------
matrix = pdc_binary_matrix["EO"][20]
df = create_df_bridges(matrix)
df.to_csv("../motif/data/pdc_20_EO.txt", sep = "\t", index=False, header=False)
# Eyes Closed ---------------------------------------------
matrix = pdc_binary_matrix["EC"][20]
df = create_df_bridges(matrix)
df.to_csv("../motif/data/pdc_20_EC.txt", sep = "\t", index=False, header=False)
"""
function to create dataframe to see result of motif analysis
from the file we got by mfinder, we examine if the motif is significant or not.
"""
def motif_analysis(file):
motif_stats = np.loadtxt(file)
# Create a dataframe of motif statistics
df = pd.DataFrame({'Motif_ID': motif_stats[:, 0], 'Freq_Real': motif_stats[:, 1], 'Freq_Rand': motif_stats[:, 2], 'sigma': motif_stats[:, 3],
#'Z': motif_stats[:, 4], 'p-value': motif_stats[:, 5],'Concetration': motif_stats[:, 6], 'Uniqueness': motif_stats[:, 7]
})
# Let significance level be 95%
df["Under_Rand"] = df["Freq_Rand"] - 2*df["sigma"]
df["Upper_Rand"] = df["Freq_Rand"] + 2*df["sigma"]
Statistical_Significance = []
for i in range(len(df)):
if df.iloc[i]["Freq_Real"] < df.iloc[i]["Under_Rand"]:
Statistical_Significance.append("anti-motif")
elif df.iloc[i]["Freq_Real"] > df.iloc[i]["Upper_Rand"]:
Statistical_Significance.append("motif")
else:
Statistical_Significance.append("not significant")
df["Statistical_Significance"] = Statistical_Significance
df_motif = df[["Motif_ID","Freq_Real", "Statistical_Significance"]]
df_motif.columns = ['Motif_ID', 'Frequency', 'Statistical_Significance']
print(df_motif["Statistical_Significance"].value_counts())
return df_motif
motif3_open = motif_analysis("../motif/data/motif3_pdc_20_EO_MAT.txt")
motif3_open
motif3_closed = motif_analysis("../motif/data/motif3_pdc_20_EC_MAT.txt")
motif3_closed
def plot_motif36(PO = False):
file_head = "../motif/data/motif3_pdc_20_"
if PO:
file_tail = "_PO_MEMBERS.txt"
region = "PO"
savefilename = "../output/3.3_motif36_PO.png"
else:
file_tail = "_MEMBERS.txt"
region = "ALL"
savefilename = "../output/3.2_motif36_ALL.png"
fig, ax = plt.subplots(1, 2, figsize=((12, 5)))
for idx, eyes in enumerate(["EO", "EC"]):
file = open(file_head + eyes + file_tail , "r")
# Import lists of nodes from file
A, B, C =[], [] , []
for i, line in enumerate(file):
if i > 4:
nodes = line.split("\t")[:-1]
if len(nodes) == 0:
break
A.append(int(nodes[0]))
B.append(int(nodes[1]))
C.append(int(nodes[2]))
shown_nodes = list(set(A + B + C))
# Create network
G = nx.DiGraph()
G.add_nodes_from(shown_nodes)
for i in range(len(A)):
if not G.has_edge(A[i], B[i]):
G.add_edge(A[i], B[i])
if not G.has_edge(C[i], B[i]):
G.add_edge(C[i], B[i])
# motif ID 36 analysis
B_count = {label_dict[key]: value for key, value in Counter(B).items()}
B_count = sorted(B_count.items(), key=lambda kv: kv[1], reverse=True)
colors = ["red", "orange"]
label_map = {v: k for k, v in label_dict.items()}
# Q3.3 parieto-occipital scalp region --------------------------------------------------------------------------------------------------------
if PO:
top10_B = [node[0] for node in B_count[:10]]
top10_B_ID = [label_map[node] for node in top10_B]
positions_PO = {key:value for key, value in positions.items() if key in shown_nodes}
label_dict_PO = {key:value for key, value in label_dict.items() if key in shown_nodes}
color_map = np.zeros(len(shown_nodes))
for i, node in enumerate(sorted(shown_nodes)):
if node in top10_B_ID:
color_map[i] = 1
color_map = [colors[int(i)] for i in color_map]
# Plot the network
nx.draw_networkx_nodes(G, positions_PO, node_color = color_map, node_shape="h", node_size=450, alpha=0.8, ax = ax[idx])
nx.draw_networkx_edges(G, positions_PO, arrowstyle="-|>", width=0.6, alpha=0.2, ax = ax[idx])
nx.draw_networkx_labels(G, positions_PO, label_dict_PO, font_size=10, alpha=0.1, ax = ax[idx])
# Q3.2 ----------------------------------------------------------------------------------------------------------------------------------
else:
top10_B = [node[0] for node in B_count[:20]]
color_map = np.zeros(len(label_map))
for node in top10_B:
color_map[label_map[node]] = 1
color_map = [colors[int(i)] for i in color_map]
# Plot the network
nx.draw_networkx_nodes(G, positions, node_color = color_map, node_shape="h", node_size=450, alpha=0.8, ax = ax[idx])
nx.draw_networkx_edges(G, positions, arrowstyle="-|>", width=0.6, alpha=0.2, ax = ax[idx])
nx.draw_networkx_labels(G, positions, label_dict, font_size=10, alpha=0.1, ax = ax[idx])
ax[idx].set_title("motif36 " + eyes + " " + region)
ax[idx].grid()
plt.xlim((-0.6, 0.6))
plt.savefig(savefilename)
plt.show()
plot_motif36()
Po_dict = {key:value for key, value in label_dict.items() if "Po" in value}
Po_ID = list(Po_dict.keys())
Not_Po_ID = [i for i in range(0,64) if i not in Po_ID]
# Eyes Open ---------------------------------------------
matrix = pdc_binary_matrix["EO"][20]
for i in Not_Po_ID:
for j in Not_Po_ID:
matrix[i][j] = 0
df = create_df_bridges(matrix)
df.to_csv("../motif/data/pdc_20_open_PO.txt", sep = "\t", index=False, header=False)
# Eyes Closed ---------------------------------------------
matrix = pdc_binary_matrix["EC"][20]
for i in Not_Po_ID:
for j in Not_Po_ID:
matrix[i][j] = 0
df = create_df_bridges(matrix)
df.to_csv("../motif/data/pdc_20_closed_PO.txt", sep = "\t", index=False, header=False)
motif3_open_PO = motif_analysis("../motif/data/motif3_pdc_20_EO_PO_MAT.txt")
motif3_open_PO
motif3_closed_PO = motif_analysis("../motif/data/motif3_pdc_20_EC_PO_MAT.txt")
motif3_closed_PO
plot_motif36(PO=True)
motif4_open = motif_analysis("../motif/data/motif4_pdc_20_EO_MAT.txt")
motif4_open.sort_values(by=["Frequency"], ascending = False).head(10)
motif4_closed = motif_analysis("../motif/data/motif4_pdc_20_EC_MAT.txt")
motif4_closed.sort_values(by=["Frequency"], ascending = False).head(10)
# Divide nodes into partitions (communities)
def community_detection(matrix):
G = nx.from_numpy_matrix(matrix) # use only UN-Directed graph
# In this library, the partitions are obtained by Louvain Algorithm
# https://github.com/taynaud/python-louvain
best_partitions = community.best_partition(G, random_state=1111)
# List up labels for each community
nodes_community = defaultdict(list)
for key, value in best_partitions.items():
nodes_community[value].append(label_dict[key])
print("Number of communities:", len(nodes_community))
for i in range(len(nodes_community)):
print("Community ", i)
print("Number of Nodes", len(nodes_community[i]))
print("\t", nodes_community[i])
return G, nodes_community
def plot_community(estimator):
fig, ax = plt.subplots(1, 2, figsize=((12, 5)))
if estimator == "PDC":
matrixes = pdc_binary_matrix
else:
matrixes = dtf_binary_matrix
for idx, eyes in enumerate(["EO", "EC"]):
print("\n", eyes, " ---------------------------------------------------------------------------------------------------------------------------------- \n")
matrix = matrixes[eyes][30]
name = "Community " + str(estimator) + " " + str(eyes) + " 30% density"
G, community = community_detection(matrix)
colors = ["r", "b", "g", "orange", "pink"]
label_map = {v: k for k, v in label_dict.items()}
color_map = np.zeros(len(label_map))
for i in range(len(community)):
nodes = [label_map[node] for node in sorted(community[i])]
color_map[nodes] = i
color_map = [colors[int(i)] for i in color_map]
nx.draw_networkx_nodes(G, positions, node_color = color_map, node_shape="h", node_size=500, alpha=0.6, ax = ax[idx])
nx.draw_networkx_labels(G, positions, label_dict, font_size=10, alpha=0.1, ax = ax[idx])
ax[idx].grid()
ax[idx].set_title(name)
plt.savefig("../output/4.2_" + estimator + ".png")
plt.show()
plot_community("PDC")
plot_community("DTF")